
************************************************************************************************
************************************************************************************************
*This .do file creates the PSAT data file used for the project

*Inputs:
*1. Graduation data files from the NCERDC from 2013-2019

*Output
*1. "/data_analysis/Value Added Generate/output/psat_2013_2019.dta"
************************************************************************************************
************************************************************************************************

clear all
foreach n of numlist 3 4 {
clear all
use "/data/Student/PSAT/mastpsat1`n'pub.dta"

ren *, lower

gen psat_score=(psatvrcn+psatmrcn+psatw)*10
drop if psat_score==.
drop if mastid==.

*Unsure what score to take if duplicates
duplicates tag mastid, gen(g)
drop if g>0

keep mastid psat_score

gen year=201`n'
qui compress
save "/data_analysis/Value Added Generate/output/psat_201`n'.dta", replace
}

*2015
clear all
use "/data/Student/SAT/collegeboard15pub.dta"
destring PSAT_CR PSAT_MATH PSAT_WRITE,replace
gen psat_score=(PSAT_CR+PSAT_MATH+PSAT_WRITE)*10

drop if psat_score==.
drop if mastid==.

gen month=substr(PSAT_TEST_DT,1,2)
gen year=("20" + substr(PSAT_TEST_DT,5,2))
destring month year, replace
gen psat_date = mdy(month, 15, year)

duplicates tag mastid, gen(g)
egen min_date=min(psat_date), by(mastid)
drop if psat_date!=min_date & g>0
duplicates tag mastid, gen(h)
drop if h>0
keep mastid psat_score
gen year=2015
qui compress
save "/data_analysis/Value Added Generate/output/psat_2015.dta", replace

*2016 to 2019
clear all

foreach n of numlist 6 7 8 9{
clear all
use "/data/Student/SAT/collegeboard1`n'pub.dta"

*Converting psat scores to 2400 scale using concordance tables

gen psat_score=.

replace psat_score=221 if PSAT_NMSQT_Total==1520
replace psat_score=219 if PSAT_NMSQT_Total==1510
replace psat_score=217 if PSAT_NMSQT_Total==1500

replace psat_score=215 if PSAT_NMSQT_Total==1490
replace psat_score=213 if PSAT_NMSQT_Total==1480
replace psat_score=211 if PSAT_NMSQT_Total==1470
replace psat_score=209 if PSAT_NMSQT_Total==1460
replace psat_score=208 if PSAT_NMSQT_Total==1450
replace psat_score=206 if PSAT_NMSQT_Total==1440
replace psat_score=204 if PSAT_NMSQT_Total==1430
replace psat_score=202 if PSAT_NMSQT_Total==1420
replace psat_score=200 if PSAT_NMSQT_Total==1410
replace psat_score=199 if PSAT_NMSQT_Total==1400

replace psat_score=197 if PSAT_NMSQT_Total==1390
replace psat_score=195 if PSAT_NMSQT_Total==1380
replace psat_score=193 if PSAT_NMSQT_Total==1370
replace psat_score=192 if PSAT_NMSQT_Total==1360
replace psat_score=190 if PSAT_NMSQT_Total==1350
replace psat_score=188 if PSAT_NMSQT_Total==1340
replace psat_score=187 if PSAT_NMSQT_Total==1330
replace psat_score=185 if PSAT_NMSQT_Total==1320
replace psat_score=184 if PSAT_NMSQT_Total==1310
replace psat_score=182 if PSAT_NMSQT_Total==1300

replace psat_score=181 if PSAT_NMSQT_Total==1290
replace psat_score=179 if PSAT_NMSQT_Total==1280
replace psat_score=178 if PSAT_NMSQT_Total==1270
replace psat_score=176 if PSAT_NMSQT_Total==1260
replace psat_score=175 if PSAT_NMSQT_Total==1250
replace psat_score=173 if PSAT_NMSQT_Total==1240
replace psat_score=171 if PSAT_NMSQT_Total==1230
replace psat_score=170 if PSAT_NMSQT_Total==1220
replace psat_score=168 if PSAT_NMSQT_Total==1210
replace psat_score=167 if PSAT_NMSQT_Total==1200

replace psat_score=165 if PSAT_NMSQT_Total==1190
replace psat_score=164 if PSAT_NMSQT_Total==1180
replace psat_score=162 if PSAT_NMSQT_Total==1170
replace psat_score=161 if PSAT_NMSQT_Total==1160
replace psat_score=159 if PSAT_NMSQT_Total==1150
replace psat_score=157 if PSAT_NMSQT_Total==1140
replace psat_score=156 if PSAT_NMSQT_Total==1130
replace psat_score=154 if PSAT_NMSQT_Total==1120
replace psat_score=153 if PSAT_NMSQT_Total==1110
replace psat_score=151 if PSAT_NMSQT_Total==1100

replace psat_score=149 if PSAT_NMSQT_Total==1090
replace psat_score=148 if PSAT_NMSQT_Total==1080
replace psat_score=146 if PSAT_NMSQT_Total==1070
replace psat_score=145 if PSAT_NMSQT_Total==1060
replace psat_score=143 if PSAT_NMSQT_Total==1050
replace psat_score=142 if PSAT_NMSQT_Total==1040
replace psat_score=140 if PSAT_NMSQT_Total==1030
replace psat_score=139 if PSAT_NMSQT_Total==1020
replace psat_score=137 if PSAT_NMSQT_Total==1010
replace psat_score=136 if PSAT_NMSQT_Total==1000

replace psat_score=134 if PSAT_NMSQT_Total==990
replace psat_score=133 if PSAT_NMSQT_Total==980
replace psat_score=131 if PSAT_NMSQT_Total==970
replace psat_score=130 if PSAT_NMSQT_Total==960
replace psat_score=128 if PSAT_NMSQT_Total==950
replace psat_score=127 if PSAT_NMSQT_Total==940
replace psat_score=125 if PSAT_NMSQT_Total==930
replace psat_score=124 if PSAT_NMSQT_Total==920
replace psat_score=122 if PSAT_NMSQT_Total==910
replace psat_score=121 if PSAT_NMSQT_Total==900

replace psat_score=120 if PSAT_NMSQT_Total==890
replace psat_score=118 if PSAT_NMSQT_Total==880
replace psat_score=117 if PSAT_NMSQT_Total==870
replace psat_score=115 if PSAT_NMSQT_Total==860
replace psat_score=114 if PSAT_NMSQT_Total==850
replace psat_score=112 if PSAT_NMSQT_Total==840
replace psat_score=111 if PSAT_NMSQT_Total==830
replace psat_score=109 if PSAT_NMSQT_Total==820
replace psat_score=107 if PSAT_NMSQT_Total==810
replace psat_score=106 if PSAT_NMSQT_Total==800

replace psat_score=104 if PSAT_NMSQT_Total==790
replace psat_score=103 if PSAT_NMSQT_Total==780
replace psat_score=101 if PSAT_NMSQT_Total==770
replace psat_score=99 if PSAT_NMSQT_Total==760
replace psat_score=98 if PSAT_NMSQT_Total==750
replace psat_score=96 if PSAT_NMSQT_Total==740
replace psat_score=95 if PSAT_NMSQT_Total==730
replace psat_score=93 if PSAT_NMSQT_Total==720
replace psat_score=91 if PSAT_NMSQT_Total==710
replace psat_score=90 if PSAT_NMSQT_Total==700

replace psat_score=88 if PSAT_NMSQT_Total==690
replace psat_score=87 if PSAT_NMSQT_Total==680
replace psat_score=86 if PSAT_NMSQT_Total==670
replace psat_score=85 if PSAT_NMSQT_Total==660
replace psat_score=84 if PSAT_NMSQT_Total==650
replace psat_score=83 if PSAT_NMSQT_Total==640
replace psat_score=82 if PSAT_NMSQT_Total==630
replace psat_score=81 if PSAT_NMSQT_Total==620
replace psat_score=80 if PSAT_NMSQT_Total==610
replace psat_score=79 if PSAT_NMSQT_Total==600

replace psat_score=78 if PSAT_NMSQT_Total==590
replace psat_score=77 if PSAT_NMSQT_Total==580
replace psat_score=76 if PSAT_NMSQT_Total==570
replace psat_score=75 if PSAT_NMSQT_Total==560
replace psat_score=74 if PSAT_NMSQT_Total==550
replace psat_score=73 if PSAT_NMSQT_Total==540
replace psat_score=73 if PSAT_NMSQT_Total==530
replace psat_score=72 if PSAT_NMSQT_Total==520
replace psat_score=71 if PSAT_NMSQT_Total==510
replace psat_score=70 if PSAT_NMSQT_Total==500

replace psat_score=69 if PSAT_NMSQT_Total==490
replace psat_score=68 if PSAT_NMSQT_Total==480
replace psat_score=67 if PSAT_NMSQT_Total==470
replace psat_score=66 if PSAT_NMSQT_Total==460
replace psat_score=65 if PSAT_NMSQT_Total==450
replace psat_score=64 if PSAT_NMSQT_Total==440
replace psat_score=63 if PSAT_NMSQT_Total==430
replace psat_score=62 if PSAT_NMSQT_Total==420
replace psat_score=61 if PSAT_NMSQT_Total==410
replace psat_score=60 if PSAT_NMSQT_Total<=400 & PSAT_NMSQT_Total>=320

replace psat_score=psat_score*10
drop if psat_score==.
drop if mastid==.


if `n'==8{
*Missing dates in 2018; oh well as only have 16 duplicates so just drop them
duplicates tag mastid, gen(h)
drop if h>0
}
if `n'==9{
duplicates tag mastid, gen(g)
egen min_date=min(PSAT_NMSQT_TEST_DT), by(mastid)
drop if PSAT_NMSQT_TEST_DT!=min_date & g>0
duplicates tag mastid, gen(h)
drop if h>0
}
else if `n'<8{
gen month=substr(PSAT_TEST_DT,6,2)
gen year=substr(PSAT_TEST_DT,1,4)
destring month year, replace
gen psat_date = mdy(month, 15, year)

duplicates tag mastid, gen(g)
egen min_date=min(psat_date), by(mastid)
drop if psat_date!=min_date & g>0
duplicates tag mastid, gen(h)
drop if h>0
}

keep mastid psat_score
gen year=201`n'
qui compress
save "/data_analysis/Value Added Generate/output/psat_201`n'.dta", replace
}


*Append all together
clear all
foreach year of numlist 2013(1)2019{
append using "/data_analysis/Value Added Generate/output/psat_`year'.dta"
erase "/data_analysis/Value Added Generate/output/psat_`year'.dta"
}

*Take first sat score
duplicates tag mastid, gen(g)
egen min_year=min(year), by(mastid)
drop if year!=min_year & g>0

drop year g min_year

compress
save "/data_analysis/Value Added Generate/output/psat_2013_2019.dta", replace
